General results:
summarised_df <- results_data_frame |>
group_by(data_generation, data_fitted) |>
summarise(mean_point = mean(point, na.rm = TRUE),
mean_ci_length_norm = mean(conf_int_normal_upper - conf_int_normal_lower, na.rm = TRUE),
coverage_ci_norm = mean((conf_int_normal_lower < 1000) & (1000 < conf_int_normal_upper), na.rm = TRUE),
mean_ci_length_log_norm = mean(conf_int_log_normal_upper - conf_int_log_normal_lower, na.rm = TRUE),
coverage_ci_log_norm = mean((conf_int_log_normal_lower < 1000) & (1000 < conf_int_log_normal_upper), na.rm = TRUE),
succesful_fits = mean(!is.na(point)))
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
print(summarised_df, n=20)
## # A tibble: 80 × 8
## # Groups: data_generation [12]
## data_generation data_fitted mean_point mean_ci_length_norm coverage_ci_norm
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Hurdleztgeom correct 1008. 168. 0.938
## 2 Hurdleztgeom negbin 1003. 340. 0.927
## 3 Hurdleztgeom oizt 873. 69.7 0
## 4 Hurdleztgeom poisson 700. 24.1 0
## 5 Hurdleztgeom wrong_link 1008. 168. 0.938
## 6 Hurdleztgeom ztHurdle 1094. 220. 0.654
## 7 Hurdleztgeom ztoi 874. 78.9 0
## 8 Hurdleztnegbin correct 999. 236. 0.914
## 9 Hurdleztnegbin geom 1188. 181. 0.004
## 10 Hurdleztnegbin oizt 990. 181. 0.864
## 11 Hurdleztnegbin poisson 816. 22.2 0
## 12 Hurdleztnegbin wrong_link 999. 237. 0.918
## 13 Hurdleztnegbin ztHurdle 23285419. 56970543. 0.934
## 14 Hurdleztnegbin ztoi 64086. 34427. 0.908
## 15 Hurdleztpoisson correct 1002. 104. 0.964
## 16 Hurdleztpoisson geometric 2279. 820. 0
## 17 Hurdleztpoisson oizt 908. 42.5 0
## 18 Hurdleztpoisson wrong_link 1003. 103. 0.962
## 19 Hurdleztpoisson ztHurdle 1057. 140. 0.687
## 20 Hurdleztpoisson ztoi 908. 42.5 0
## # ℹ 60 more rows
## # ℹ 3 more variables: mean_ci_length_log_norm <dbl>,
## # coverage_ci_log_norm <dbl>, succesful_fits <dbl>
pp <- summarised_df |>
subset(succesful_fits < 1) |>
as.data.frame() |>
mutate(data_generation = ordered(data_generation)) |>
ggplot(aes(y = succesful_fits, x = data_fitted)) +
geom_point() +
facet_wrap(~data_generation, scales = c("free_x"), ncol = 3) +
ylab("Fitted proportion") +
xlab("Model fitted") +
ggtitle("Proportion of succesfully fitted models by true distribution of counts")
pp
Visualising outliers (i.e. when estimated regression parameters tend to boundary):
outliers <- results_data_frame |>
subset(!is.na(point)) |>
subset(point > 5000) |>
group_by(data_generation, data_fitted) |>
summarise(n = n()) |>
ggplot(aes(x = data_fitted, weight = n)) +
geom_bar() +
facet_wrap(~ data_generation, scales = c("free_x")) +
ylab("Number of outliers") +
xlab("Model fitted") +
ggtitle("Exreme outliers (estimate > 5 * true size) by true distribution of counts")
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
outliers
Results for counts generated by ztoipoisson:
p1 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "ztoipoisson")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p1
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "ztoipoisson")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by oiztpoisson:
p2 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "oiztpoisson")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p2
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "oiztpoisson")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by ztHurdlepoisson:
p3 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "ztHurdlepoisson")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p3
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "ztHurdlepoisson")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by hurdleztpoisson:
p4 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "Hurdleztpoisson")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p4
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "Hurdleztpoisson")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by ztoigeom:
p5 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "ztoigeom")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p5
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "ztoigeom")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by oiztgeom:
p6 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "oiztgeom")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p6
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "oiztgeom")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by ztHurdlegeom:
p7 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "ztHurdlegeom")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p7
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "ztHurdlegeom")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by hurdleztgeom:
p8 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "Hurdleztgeom")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p8
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "Hurdleztgeom")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by ztoinegbin:
p9 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "ztoinegbin")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p9
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "ztoinegbin")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by oiztnegbin:
p10 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "oiztnegbin")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p10
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "oiztnegbin")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by ztHurdlenegbin:
p11 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "ztHurdlenegbin")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p11
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "ztHurdlenegbin")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Results for counts generated by hurdleztnegbin:
p12 <- results_data_frame |>
subset(!is.na(point) & (data_generation == "Hurdleztnegbin")) |>
subset(point < 5000) |>
ggplot(aes(x = data_fitted, y = point)) +
geom_jitter(alpha = 0.05, shape = 1) +
geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") +
geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
ylab("Point estimate") +
xlab("Model fitted")
p12
Summary statistics after excluding outliers:
results_data_frame |>
subset(!is.na(point) & (data_generation == "Hurdleztnegbin")) |>
subset(point < 5000) |>
group_by(data_fitted) |>
summarise(bias = mean(point - 1000),
rel_bias = mean(point - 1000) / 1000,
MSE = mean((point - 1000)^2),
MAE = mean(abs(point - 1000)),
coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))
Exact binomial tests for coverage of lognormal confindence intervals with \(H_0:p=0.95, H_1:p\neq0.95\):
dd <- results_data_frame |>
subset(!is.na(point)) |>
subset(point < 5000) |>
mutate(covr_norm = (conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000),
covr_log = (conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)) |>
group_by(data_generation, data_fitted) |>
summarise(n = n(),
mean = mean(covr_norm, na.rm = TRUE))
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
dd <- cbind(dd, p_value = NA, lower = NA, upper = NA, signif = NA)
for (x in 1:NROW(dd)) {
jj <- binom.test(x = as.numeric(dd[x, 4]) * as.integer(dd[x, 3]), n = as.integer(dd[x, 3]), p = .95)
# this jj object has some very weird interactions with the rest of R ecosystem
dd[x, 5] <- jj$p.value |> as.numeric()
dd[x, 6] <- jj[[4]][1]
dd[x, 7] <- jj[[4]][2]
dd[x, 8] <- ifelse(dd[x, 5] < .001, "***", ifelse(dd[x, 5] < .01, "**", ifelse(dd[x, 5] < .05, "*", ifelse(dd[x, 5] < .1, ".", ""))))
}
dd |>
mutate(p_value = round(p_value, digits = 4),
lower = round(lower, digits = 4),
upper = round(upper, digits = 4)) |>
print(n = NROW(dd))
## # A tibble: 80 × 8
## # Groups: data_generation [12]
## data_generation data_fitted n mean p_value lower upper signif
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Hurdleztgeom correct 500 0.938 0.217 0.913 0.958 ""
## 2 Hurdleztgeom negbin 494 0.927 0.0292 0.900 0.948 "*"
## 3 Hurdleztgeom oizt 500 0 0 0 0.0074 "***"
## 4 Hurdleztgeom poisson 490 0 0 0 0.0075 "***"
## 5 Hurdleztgeom wrong_link 500 0.938 0.217 0.913 0.958 ""
## 6 Hurdleztgeom ztHurdle 500 0.654 0 0.610 0.696 "***"
## 7 Hurdleztgeom ztoi 500 0 0 0 0.0074 "***"
## 8 Hurdleztnegbin correct 500 0.914 0.0006 0.886 0.937 "***"
## 9 Hurdleztnegbin geom 500 0.004 0 0.0005 0.0144 "***"
## 10 Hurdleztnegbin oizt 500 0.864 0 0.831 0.893 "***"
## 11 Hurdleztnegbin poisson 500 0 0 0 0.0074 "***"
## 12 Hurdleztnegbin wrong_link 500 0.918 0.002 0.890 0.940 "**"
## 13 Hurdleztnegbin ztHurdle 492 0.945 0.604 0.921 0.964 ""
## 14 Hurdleztnegbin ztoi 497 0.913 0.0006 0.885 0.937 "***"
## 15 Hurdleztpoisson correct 499 0.964 0.181 0.944 0.978 ""
## 16 Hurdleztpoisson geometric 500 0 0 0 0.0074 "***"
## 17 Hurdleztpoisson oizt 497 0 0 0 0.0074 "***"
## 18 Hurdleztpoisson wrong_link 499 0.962 0.258 0.941 0.977 ""
## 19 Hurdleztpoisson ztHurdle 499 0.687 0 0.645 0.728 "***"
## 20 Hurdleztpoisson ztoi 497 0 0 0 0.0074 "***"
## 21 oiztgeom Hurdlezt 500 0.946 0.681 0.922 0.964 ""
## 22 oiztgeom correct 500 0.91 0.0002 0.881 0.934 "***"
## 23 oiztgeom negbin 494 0.850 0 0.816 0.880 "***"
## 24 oiztgeom poisson 491 0 0 0 0.0075 "***"
## 25 oiztgeom wrong_link 500 0.9 0 0.870 0.925 "***"
## 26 oiztgeom ztHurdle 500 0.004 0 0.0005 0.0144 "***"
## 27 oiztgeom ztoi 500 0.702 0 0.660 0.742 "***"
## 28 oiztnegbin Hurdlezt 500 0.99 0 0.977 0.997 "***"
## 29 oiztnegbin correct 500 0.986 0 0.971 0.994 "***"
## 30 oiztnegbin geom 500 0 0 0 0.0074 "***"
## 31 oiztnegbin poisson 499 0 0 0 0.0074 "***"
## 32 oiztnegbin wrong_link 500 0.988 0 0.974 0.996 "***"
## 33 oiztnegbin ztHurdle 491 0.0387 0 0.0235 0.0598 "***"
## 34 oiztnegbin ztoi 494 0.229 0 0.192 0.268 "***"
## 35 oiztpoisson Hurdlezt 499 0.754 0 0.713 0.791 "***"
## 36 oiztpoisson correct 495 0.931 0.0627 0.905 0.952 "."
## 37 oiztpoisson geometric 500 0 0 0 0.0074 "***"
## 38 oiztpoisson wrong_link 493 0.923 0.0093 0.896 0.945 "**"
## 39 oiztpoisson ztHurdle 499 0 0 0 0.0074 "***"
## 40 oiztpoisson ztoi 498 0.807 0 0.770 0.841 "***"
## 41 ztHurdlegeom Hurdlezt 500 0.86 0 0.826 0.889 "***"
## 42 ztHurdlegeom correct 500 0.96 0.355 0.939 0.975 ""
## 43 ztHurdlegeom negbin 486 0.932 0.0761 0.906 0.953 "."
## 44 ztHurdlegeom oizt 500 0 0 0 0.0074 "***"
## 45 ztHurdlegeom poisson 500 0 0 0 0.0074 "***"
## 46 ztHurdlegeom wrong_link 495 0.962 0.301 0.941 0.977 ""
## 47 ztHurdlegeom ztoi 500 0 0 0 0.0074 "***"
## 48 ztHurdlenegbin Hurdlezt 500 0.576 0 0.531 0.620 "***"
## 49 ztHurdlenegbin correct 495 0.941 0.354 0.917 0.960 ""
## 50 ztHurdlenegbin geom 500 0 0 0 0.0074 "***"
## 51 ztHurdlenegbin oizt 500 0.012 0 0.0044 0.0259 "***"
## 52 ztHurdlenegbin poisson 500 0 0 0 0.0074 "***"
## 53 ztHurdlenegbin wrong_link 497 0.942 0.409 0.917 0.961 ""
## 54 ztHurdlenegbin ztoi 499 0.0160 0 0.0069 0.0313 "***"
## 55 ztHurdlepoisson Hurdlezt 500 0.8 0 0.762 0.834 "***"
## 56 ztHurdlepoisson correct 500 0.946 0.681 0.922 0.964 ""
## 57 ztHurdlepoisson geometric 500 0 0 0 0.0074 "***"
## 58 ztHurdlepoisson oizt 499 0 0 0 0.0074 "***"
## 59 ztHurdlepoisson wrong_link 500 0.946 0.681 0.922 0.964 ""
## 60 ztHurdlepoisson ztoi 498 0 0 0 0.0074 "***"
## 61 ztoigeom Hurdlezt 500 0.774 0 0.735 0.810 "***"
## 62 ztoigeom correct 500 0.948 0.837 0.925 0.966 ""
## 63 ztoigeom negbin 440 0.868 0 0.833 0.898 "***"
## 64 ztoigeom oizt 500 0.83 0 0.794 0.862 "***"
## 65 ztoigeom poisson 497 0 0 0 0.0074 "***"
## 66 ztoigeom wrong_link 500 0.95 1 0.927 0.967 ""
## 67 ztoigeom ztHurdle 500 0.13 0 0.102 0.163 "***"
## 68 ztoinegbin Hurdlezt 500 0.55 0 0.505 0.594 "***"
## 69 ztoinegbin correct 493 0.921 0.0051 0.893 0.943 "**"
## 70 ztoinegbin geom 500 0 0 0 0.0074 "***"
## 71 ztoinegbin oizt 500 0.58 0 0.535 0.624 "***"
## 72 ztoinegbin poisson 500 0 0 0 0.0074 "***"
## 73 ztoinegbin wrong_link 494 0.925 0.0169 0.898 0.947 "*"
## 74 ztoinegbin ztHurdle 491 0.986 0 0.971 0.994 "***"
## 75 ztoipoisson Hurdlezt 499 0.465 0 0.420 0.510 "***"
## 76 ztoipoisson correct 496 0.938 0.215 0.912 0.957 ""
## 77 ztoipoisson geometric 500 0 0 0 0.0074 "***"
## 78 ztoipoisson oizt 490 0.829 0 0.792 0.861 "***"
## 79 ztoipoisson wrong_link 497 0.940 0.302 0.915 0.959 ""
## 80 ztoipoisson ztHurdle 499 0.0180 0 0.0083 0.034 "***"
## show only those that have high p value or have better coverage
dd |>
filter(p_value > .05 | mean > .95) |>
mutate(p_value = round(p_value, digits = 4),
lower = round(lower, digits = 4),
upper = round(upper, digits = 4)) |>
print(n = NROW(dd))
## # A tibble: 22 × 8
## # Groups: data_generation [12]
## data_generation data_fitted n mean p_value lower upper signif
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Hurdleztgeom correct 500 0.938 0.217 0.913 0.958 ""
## 2 Hurdleztgeom wrong_link 500 0.938 0.217 0.913 0.958 ""
## 3 Hurdleztnegbin ztHurdle 492 0.945 0.604 0.921 0.964 ""
## 4 Hurdleztpoisson correct 499 0.964 0.181 0.944 0.978 ""
## 5 Hurdleztpoisson wrong_link 499 0.962 0.258 0.941 0.977 ""
## 6 oiztgeom Hurdlezt 500 0.946 0.681 0.922 0.964 ""
## 7 oiztnegbin Hurdlezt 500 0.99 0 0.977 0.997 "***"
## 8 oiztnegbin correct 500 0.986 0 0.971 0.994 "***"
## 9 oiztnegbin wrong_link 500 0.988 0 0.974 0.996 "***"
## 10 oiztpoisson correct 495 0.931 0.0627 0.905 0.952 "."
## 11 ztHurdlegeom correct 500 0.96 0.355 0.939 0.975 ""
## 12 ztHurdlegeom negbin 486 0.932 0.0761 0.906 0.953 "."
## 13 ztHurdlegeom wrong_link 495 0.962 0.301 0.941 0.977 ""
## 14 ztHurdlenegbin correct 495 0.941 0.354 0.917 0.960 ""
## 15 ztHurdlenegbin wrong_link 497 0.942 0.409 0.917 0.961 ""
## 16 ztHurdlepoisson correct 500 0.946 0.681 0.922 0.964 ""
## 17 ztHurdlepoisson wrong_link 500 0.946 0.681 0.922 0.964 ""
## 18 ztoigeom correct 500 0.948 0.837 0.925 0.966 ""
## 19 ztoigeom wrong_link 500 0.95 1 0.927 0.967 ""
## 20 ztoinegbin ztHurdle 491 0.986 0 0.971 0.994 "***"
## 21 ztoipoisson correct 496 0.938 0.215 0.912 0.957 ""
## 22 ztoipoisson wrong_link 497 0.940 0.302 0.915 0.959 ""
Visual results with confidence intervals:
qq1 <- dd |>
ggplot(aes(x = data_fitted)) +
facet_wrap(~ data_generation, scales = c("free_x"), ncol = 3) +
geom_point(aes(y = mean), colour = "navy", cex = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), colour = "navy") +
ggtitle("Empirical coverage of studentized confidence intervals by true distribution of counts") +
xlab("Fitted distribution") +
ylab("Coverage")
qq1
Average sizes of confidence intervals:
qq2 <- results_data_frame |>
subset(!is.na(point)) |>
subset(point < 5000) |>
group_by(data_generation, data_fitted) |>
summarise(len = mean(conf_int_normal_upper - conf_int_normal_lower, na.rm = TRUE)) |>
ggplot(aes(x = data_fitted, weight = len)) +
geom_bar() +
facet_wrap(~ data_generation, scales = c("free_x"), ncol = 3) +
ylab("Average length") +
xlab("Fitted distribution") +
ggtitle("Empirical size of studentized confidence intervals by true distribution of counts")
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
qq2
Exact binomial tests for coverage of normal confindence intervals with \(H_0:p=0.95, H_1:p\neq0.95\):
dd <- results_data_frame |>
subset(!is.na(point)) |>
subset(point < 5000) |>
mutate(covr_norm = (conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000),
covr_log = (conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)) |>
group_by(data_generation, data_fitted) |>
summarise(n = n(),
mean = mean(covr_log, na.rm = TRUE))
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
dd <- cbind(dd, p_value = NA, lower = NA, upper = NA, signif = NA)
for (x in 1:NROW(dd)) {
jj <- binom.test(x = as.numeric(dd[x, 4]) * as.integer(dd[x, 3]), n = as.integer(dd[x, 3]), p = .95)
# this jj object has some very weird interactions with the rest of R ecosystem
dd[x, 5] <- jj$p.value |> as.numeric()
dd[x, 6] <- jj[[4]][1]
dd[x, 7] <- jj[[4]][2]
dd[x, 8] <- ifelse(dd[x, 5] < .001, "***", ifelse(dd[x, 5] < .01, "**", ifelse(dd[x, 5] < .05, "*", ifelse(dd[x, 5] < .1, ".", ""))))
}
dd |>
mutate(p_value = round(p_value, digits = 4),
lower = round(lower, digits = 4),
upper = round(upper, digits = 4)) |>
print(n = NROW(dd))
## # A tibble: 80 × 8
## # Groups: data_generation [12]
## data_generation data_fitted n mean p_value lower upper signif
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Hurdleztgeom correct 500 0.944 0.537 0.920 0.962 ""
## 2 Hurdleztgeom negbin 494 0.960 0.408 0.938 0.975 ""
## 3 Hurdleztgeom oizt 500 0 0 0 0.0074 "***"
## 4 Hurdleztgeom poisson 490 0 0 0 0.0075 "***"
## 5 Hurdleztgeom wrong_link 500 0.944 0.537 0.920 0.962 ""
## 6 Hurdleztgeom ztHurdle 500 0.542 0 0.497 0.586 "***"
## 7 Hurdleztgeom ztoi 500 0 0 0 0.0074 "***"
## 8 Hurdleztnegbin correct 500 0.948 0.837 0.925 0.966 ""
## 9 Hurdleztnegbin geom 500 0.002 0 0.0001 0.0111 "***"
## 10 Hurdleztnegbin oizt 500 0.902 0 0.872 0.927 "***"
## 11 Hurdleztnegbin poisson 500 0 0 0 0.0074 "***"
## 12 Hurdleztnegbin wrong_link 500 0.948 0.837 0.925 0.966 ""
## 13 Hurdleztnegbin ztHurdle 492 0.549 0 0.504 0.593 "***"
## 14 Hurdleztnegbin ztoi 497 0.901 0 0.872 0.926 "***"
## 15 Hurdleztpoisson correct 499 0.958 0.473 0.936 0.974 ""
## 16 Hurdleztpoisson geometric 500 0 0 0 0.0074 "***"
## 17 Hurdleztpoisson oizt 497 0 0 0 0.0074 "***"
## 18 Hurdleztpoisson wrong_link 499 0.954 0.758 0.932 0.971 ""
## 19 Hurdleztpoisson ztHurdle 499 0.553 0 0.508 0.597 "***"
## 20 Hurdleztpoisson ztoi 497 0 0 0 0.0074 "***"
## 21 oiztgeom Hurdlezt 500 0.952 0.918 0.929 0.969 ""
## 22 oiztgeom correct 500 0.892 0 0.861 0.918 "***"
## 23 oiztgeom negbin 494 0.844 0 0.809 0.875 "***"
## 24 oiztgeom poisson 491 0 0 0 0.0075 "***"
## 25 oiztgeom wrong_link 500 0.89 0 0.859 0.916 "***"
## 26 oiztgeom ztHurdle 500 0.004 0 0.0005 0.0144 "***"
## 27 oiztgeom ztoi 500 0.608 0 0.564 0.651 "***"
## 28 oiztnegbin Hurdlezt 500 0.844 0 0.809 0.875 "***"
## 29 oiztnegbin correct 500 0.832 0 0.796 0.864 "***"
## 30 oiztnegbin geom 500 0 0 0 0.0074 "***"
## 31 oiztnegbin poisson 499 0 0 0 0.0074 "***"
## 32 oiztnegbin wrong_link 500 0.832 0 0.796 0.864 "***"
## 33 oiztnegbin ztHurdle 491 0.00407 0 0.0005 0.0146 "***"
## 34 oiztnegbin ztoi 494 0.0466 0 0.0297 0.069 "***"
## 35 oiztpoisson Hurdlezt 499 0.836 0 0.800 0.867 "***"
## 36 oiztpoisson correct 495 0.929 0.0389 0.903 0.950 "*"
## 37 oiztpoisson geometric 500 0 0 0 0.0074 "***"
## 38 oiztpoisson wrong_link 493 0.927 0.0229 0.900 0.948 "*"
## 39 oiztpoisson ztHurdle 499 0 0 0 0.0074 "***"
## 40 oiztpoisson ztoi 498 0.717 0 0.675 0.756 "***"
## 41 ztHurdlegeom Hurdlezt 500 0.892 0 0.861 0.918 "***"
## 42 ztHurdlegeom correct 500 0.956 0.608 0.934 0.972 ""
## 43 ztHurdlegeom negbin 486 0.938 0.251 0.913 0.958 ""
## 44 ztHurdlegeom oizt 500 0 0 0 0.0074 "***"
## 45 ztHurdlegeom poisson 500 0 0 0 0.0074 "***"
## 46 ztHurdlegeom wrong_link 495 0.958 0.535 0.936 0.974 ""
## 47 ztHurdlegeom ztoi 500 0 0 0 0.0074 "***"
## 48 ztHurdlenegbin Hurdlezt 500 0.752 0 0.712 0.789 "***"
## 49 ztHurdlenegbin correct 495 0.966 0.121 0.946 0.980 ""
## 50 ztHurdlenegbin geom 500 0 0 0 0.0074 "***"
## 51 ztHurdlenegbin oizt 500 0.04 0 0.0246 0.0611 "***"
## 52 ztHurdlenegbin poisson 500 0 0 0 0.0074 "***"
## 53 ztHurdlenegbin wrong_link 497 0.966 0.122 0.946 0.98 ""
## 54 ztHurdlenegbin ztoi 499 0.0341 0 0.02 0.054 "***"
## 55 ztHurdlepoisson Hurdlezt 500 0.87 0 0.837 0.898 "***"
## 56 ztHurdlepoisson correct 500 0.952 0.918 0.929 0.969 ""
## 57 ztHurdlepoisson geometric 500 0 0 0 0.0074 "***"
## 58 ztHurdlepoisson oizt 499 0 0 0 0.0074 "***"
## 59 ztHurdlepoisson wrong_link 500 0.952 0.918 0.929 0.969 ""
## 60 ztHurdlepoisson ztoi 498 0 0 0 0.0074 "***"
## 61 ztoigeom Hurdlezt 500 0.822 0 0.786 0.854 "***"
## 62 ztoigeom correct 500 0.936 0.150 0.911 0.956 ""
## 63 ztoigeom negbin 440 0.861 0 0.826 0.892 "***"
## 64 ztoigeom oizt 500 0.856 0 0.822 0.886 "***"
## 65 ztoigeom poisson 497 0 0 0 0.0074 "***"
## 66 ztoigeom wrong_link 500 0.934 0.101 0.909 0.954 ""
## 67 ztoigeom ztHurdle 500 0.068 0 0.0475 0.0937 "***"
## 68 ztoinegbin Hurdlezt 500 0.682 0 0.639 0.723 "***"
## 69 ztoinegbin correct 493 0.945 0.605 0.921 0.964 ""
## 70 ztoinegbin geom 500 0 0 0 0.0074 "***"
## 71 ztoinegbin oizt 500 0.702 0 0.660 0.742 "***"
## 72 ztoinegbin poisson 500 0 0 0 0.0074 "***"
## 73 ztoinegbin wrong_link 494 0.951 1 0.929 0.969 ""
## 74 ztoinegbin ztHurdle 491 0.768 0 0.728 0.804 "***"
## 75 ztoipoisson Hurdlezt 499 0.599 0 0.555 0.642 "***"
## 76 ztoipoisson correct 496 0.938 0.215 0.912 0.957 ""
## 77 ztoipoisson geometric 500 0 0 0 0.0074 "***"
## 78 ztoipoisson oizt 490 0.876 0 0.843 0.903 "***"
## 79 ztoipoisson wrong_link 497 0.940 0.302 0.915 0.959 ""
## 80 ztoipoisson ztHurdle 499 0.00802 0 0.0022 0.0204 "***"
## show only those that have high p value or have better coverage
dd |>
filter(p_value > .05 | mean > .95) |>
mutate(p_value = round(p_value, digits = 4),
lower = round(lower, digits = 4),
upper = round(upper, digits = 4)) |>
print(n = NROW(dd))
## # A tibble: 21 × 8
## # Groups: data_generation [10]
## data_generation data_fitted n mean p_value lower upper signif
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Hurdleztgeom correct 500 0.944 0.537 0.920 0.962 ""
## 2 Hurdleztgeom negbin 494 0.960 0.408 0.938 0.975 ""
## 3 Hurdleztgeom wrong_link 500 0.944 0.537 0.920 0.962 ""
## 4 Hurdleztnegbin correct 500 0.948 0.837 0.925 0.966 ""
## 5 Hurdleztnegbin wrong_link 500 0.948 0.837 0.925 0.966 ""
## 6 Hurdleztpoisson correct 499 0.958 0.473 0.936 0.974 ""
## 7 Hurdleztpoisson wrong_link 499 0.954 0.758 0.932 0.971 ""
## 8 oiztgeom Hurdlezt 500 0.952 0.918 0.929 0.969 ""
## 9 ztHurdlegeom correct 500 0.956 0.608 0.934 0.972 ""
## 10 ztHurdlegeom negbin 486 0.938 0.251 0.913 0.958 ""
## 11 ztHurdlegeom wrong_link 495 0.958 0.535 0.936 0.974 ""
## 12 ztHurdlenegbin correct 495 0.966 0.121 0.946 0.980 ""
## 13 ztHurdlenegbin wrong_link 497 0.966 0.122 0.946 0.98 ""
## 14 ztHurdlepoisson correct 500 0.952 0.918 0.929 0.969 ""
## 15 ztHurdlepoisson wrong_link 500 0.952 0.918 0.929 0.969 ""
## 16 ztoigeom correct 500 0.936 0.150 0.911 0.956 ""
## 17 ztoigeom wrong_link 500 0.934 0.101 0.909 0.954 ""
## 18 ztoinegbin correct 493 0.945 0.605 0.921 0.964 ""
## 19 ztoinegbin wrong_link 494 0.951 1 0.929 0.969 ""
## 20 ztoipoisson correct 496 0.938 0.215 0.912 0.957 ""
## 21 ztoipoisson wrong_link 497 0.940 0.302 0.915 0.959 ""
Visual results with confidence intervals:
qq3 <- dd |>
ggplot(aes(x = data_fitted)) +
facet_wrap(~ data_generation, scales = c("free_x"), ncol = 3) +
geom_point(aes(y = mean), colour = "navy", cex = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), colour = "navy") +
ggtitle("Empirical coverage of log normal confidence intervals by true distribution of counts") +
xlab("Fitted distribution") +
ylab("Coverage")
qq3
Average sizes of confidence intervals:
qq4 <- results_data_frame |>
subset(!is.na(point)) |>
subset(point < 5000) |>
group_by(data_generation, data_fitted) |>
summarise(len = mean(conf_int_log_normal_upper - conf_int_log_normal_lower, na.rm = TRUE)) |>
ggplot(aes(x = data_fitted, weight = len)) +
geom_bar() +
facet_wrap(~ data_generation, scales = "free", ncol = 3) +
ylab("Average length") +
xlab("Fitted distribution") +
ggtitle("Empirical size of log normal confidence intervals by true distribution of counts")
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
qq4